/*
 * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
/*
 * Copyright 1999-2004 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sun.org.apache.xerces.internal.impl.xs.models;

import com.sun.org.apache.xerces.internal.xni.QName;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;

import java.util.Vector;
import java.util.ArrayList;
import java.util.HashMap;

/**
 * DFAContentModel is the implementation of XSCMValidator that does
 * all of the non-trivial element content validation. This class does
 * the conversion from the regular expression to the DFA that
 * it then uses in its validation algorithm.
 *
 * @author Neil Graham, IBM
 * @version $Id: XSDFACM.java,v 1.9 2010/08/06 23:49:43 joehw Exp $
 * @xerces.internal
 */
public class XSDFACM
    implements XSCMValidator {

  //
  // Constants
  //
  private static final boolean DEBUG = false;

  // special strings

  // debugging

  /**
   * Set to true to debug content model validation.
   */
  private static final boolean DEBUG_VALIDATE_CONTENT = false;

  //
  // Data
  //

  /**
   * This is the map of unique input symbol elements to indices into
   * each state's per-input symbol transition table entry. This is part
   * of the built DFA information that must be kept around to do the
   * actual validation.  Note tat since either XSElementDecl or XSParticleDecl object
   * can live here, we've got to use an Object.
   */
  private Object fElemMap[] = null;

  /**
   * This is a map of whether the element map contains information
   * related to ANY models.
   */
  private int fElemMapType[] = null;

  /**
   * id of the unique input symbol
   */
  private int fElemMapId[] = null;

  /**
   * The element map size.
   */
  private int fElemMapSize = 0;

  /**
   * This is an array of booleans, one per state (there are
   * fTransTableSize states in the DFA) that indicates whether that
   * state is a final state.
   */
  private boolean fFinalStateFlags[] = null;

  /**
   * The list of follow positions for each NFA position (i.e. for each
   * non-epsilon leaf node.) This is only used during the building of
   * the DFA, and is let go afterwards.
   */
  private CMStateSet fFollowList[] = null;

  /**
   * This is the head node of our intermediate representation. It is
   * only non-null during the building of the DFA (just so that it
   * does not have to be passed all around.) Once the DFA is built,
   * this is no longer required so its nulled out.
   */
  private CMNode fHeadNode = null;

  /**
   * The count of leaf nodes. This is an important number that set some
   * limits on the sizes of data structures in the DFA process.
   */
  private int fLeafCount = 0;

  /**
   * An array of non-epsilon leaf nodes, which is used during the DFA
   * build operation, then dropped.
   */
  private XSCMLeaf fLeafList[] = null;

  /**
   * Array mapping ANY types to the leaf list.
   */
  private int fLeafListType[] = null;

  /**
   * This is the transition table that is the main by product of all
   * of the effort here. It is an array of arrays of ints. The first
   * dimension is the number of states we end up with in the DFA. The
   * second dimensions is the number of unique elements in the content
   * model (fElemMapSize). Each entry in the second dimension indicates
   * the new state given that input for the first dimension's start
   * state.
   * <p>
   * The fElemMap array handles mapping from element indexes to
   * positions in the second dimension of the transition table.
   */
  private int fTransTable[][] = null;
  /**
   * Array containing occurence information for looping states
   * which use counters to check minOccurs/maxOccurs.
   */
  private Occurence[] fCountingStates = null;

  static final class Occurence {

    final int minOccurs;
    final int maxOccurs;
    final int elemIndex;

    public Occurence(XSCMRepeatingLeaf leaf, int elemIndex) {
      minOccurs = leaf.getMinOccurs();
      maxOccurs = leaf.getMaxOccurs();
      this.elemIndex = elemIndex;
    }

    public String toString() {
      return "minOccurs=" + minOccurs
          + ";maxOccurs=" +
          ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED)
              ? Integer.toString(maxOccurs) : "unbounded");
    }
  }

  /**
   * The number of valid entries in the transition table, and in the other
   * related tables such as fFinalStateFlags.
   */
  private int fTransTableSize = 0;

  /**
   * Array of counters for all the for elements (or wildcards)
   * of the form a{n,m} where n > 1 and m <= unbounded. Used
   * to count the a's to later check against n and m. Counter
   * set to -1 if element (or wildcard) not optimized by
   * constant space algorithm.
   */
  private int fElemMapCounter[];

  /**
   * Array of lower bounds for all the for elements (or wildcards)
   * of the form a{n,m} where n > 1 and m <= unbounded. This array
   * stores the n's for those elements (or wildcards) for which
   * the constant space algorithm applies (or -1 otherwise).
   */
  private int fElemMapCounterLowerBound[];

  /**
   * Array of upper bounds for all the for elements (or wildcards)
   * of the form a{n,m} where n > 1 and m <= unbounded. This array
   * stores the n's for those elements (or wildcards) for which
   * the constant space algorithm applies, or -1 if algorithm does
   * not apply or m = unbounded.
   */
  private int fElemMapCounterUpperBound[];   // -1 if no upper bound

  // temp variables

  //
  // Constructors
  //

  /**
   * Constructs a DFA content model.
   *
   * @param syntaxTree The syntax tree of the content model.
   * @param leafCount The number of leaves.
   * @throws RuntimeException Thrown if DFA can't be built.
   */

  public XSDFACM(CMNode syntaxTree, int leafCount) {

    // Store away our index and pools in members
    fLeafCount = leafCount;

    //
    //  Create some string pool indexes that represent the names of some
    //  magical nodes in the syntax tree.
    //  (already done in static initialization...
    //

    //
    //  Ok, so lets grind through the building of the DFA. This method
    //  handles the high level logic of the algorithm, but it uses a
    //  number of helper classes to do its thing.
    //
    //  In order to avoid having hundreds of references to the error and
    //  string handlers around, this guy and all of his helper classes
    //  just throw a simple exception and we then pass it along.
    //

    if (DEBUG_VALIDATE_CONTENT) {
      XSDFACM.time -= System.currentTimeMillis();
    }

    buildDFA(syntaxTree);

    if (DEBUG_VALIDATE_CONTENT) {
      XSDFACM.time += System.currentTimeMillis();
      System.out.println("DFA build: " + XSDFACM.time + "ms");
    }
  }

  private static long time = 0;

  //
  // XSCMValidator methods
  //

  /**
   * check whether the given state is one of the final states
   *
   * @param state the state to check
   * @return whether it's a final state
   */
  public boolean isFinalState(int state) {
    return (state < 0) ? false :
        fFinalStateFlags[state];
  }

  /**
   * one transition only
   *
   * @param curElem The current element's QName
   * @param state stack to store the previous state
   * @param subGroupHandler the substitution group handler
   * @return null if transition is invalid; otherwise the Object corresponding to the XSElementDecl
   * or XSWildcardDecl identified.  Also, the state array will be modified to include the new state;
   * this so that the validator can store it away.
   * @throws RuntimeException thrown on error
   */
  public Object oneTransition(QName curElem, int[] state,
      SubstitutionGroupHandler subGroupHandler) {
    int curState = state[0];

    if (curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
      // there was an error last time; so just go find correct Object in fElemmMap.
      // ... after resetting state[0].
      if (curState == XSCMValidator.FIRST_ERROR) {
        state[0] = XSCMValidator.SUBSEQUENT_ERROR;
      }

      return findMatchingDecl(curElem, subGroupHandler);
    }

    int nextState = 0;
    int elemIndex = 0;
    Object matchingDecl = null;

    for (; elemIndex < fElemMapSize; elemIndex++) {
      nextState = fTransTable[curState][elemIndex];
      if (nextState == -1) {
        continue;
      }
      int type = fElemMapType[elemIndex];
      if (type == XSParticleDecl.PARTICLE_ELEMENT) {
        matchingDecl = subGroupHandler
            .getMatchingElemDecl(curElem, (XSElementDecl) fElemMap[elemIndex]);
        if (matchingDecl != null) {
          // Increment counter if constant space algorithm applies
          if (fElemMapCounter[elemIndex] >= 0) {
            fElemMapCounter[elemIndex]++;
          }
          break;
        }
      } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
        if (((XSWildcardDecl) fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
          matchingDecl = fElemMap[elemIndex];
          // Increment counter if constant space algorithm applies
          if (fElemMapCounter[elemIndex] >= 0) {
            fElemMapCounter[elemIndex]++;
          }
          break;
        }
      }
    }

    // if we still can't find a match, set the state to first_error
    // and return null
    if (elemIndex == fElemMapSize) {
      state[1] = state[0];
      state[0] = XSCMValidator.FIRST_ERROR;
      return findMatchingDecl(curElem, subGroupHandler);
    }

    if (fCountingStates != null) {
      Occurence o = fCountingStates[curState];
      if (o != null) {
        if (curState == nextState) {
          if (++state[2] > o.maxOccurs &&
              o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            // It's likely that we looped too many times on the current state
            // however it's possible that we actually matched another particle
            // which allows the same name.
            //
            // Consider:
            //
            // <xs:sequence>
            //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
            //  <xs:element name="foo" type="xs:string" fixed="bar"/>
            // </xs:sequence>
            //
            // and
            //
            // <xs:sequence>
            //  <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/>
            //  <xs:any namespace="##any" processContents="skip"/>
            // </xs:sequence>
            //
            // In the DFA there will be two transitions from the current state which
            // allow "foo". Note that this is not a UPA violation. The ambiguity of which
            // transition to take is resolved by the current value of the counter. Since
            // we've already seen enough instances of the first "foo" perhaps there is
            // another element declaration or wildcard deeper in the element map which
            // matches.
            return findMatchingDecl(curElem, state, subGroupHandler, elemIndex);
          }
        } else if (state[2] < o.minOccurs) {
          // not enough loops on the current state.
          state[1] = state[0];
          state[0] = XSCMValidator.FIRST_ERROR;
          return findMatchingDecl(curElem, subGroupHandler);
        } else {
          // Exiting a counting state. If we're entering a new
          // counting state, reset the counter.
          o = fCountingStates[nextState];
          if (o != null) {
            state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
          }
        }
      } else {
        o = fCountingStates[nextState];
        if (o != null) {
          // Entering a new counting state. Reset the counter.
          // If we've already seen one instance of the looping
          // particle set the counter to 1, otherwise set it
          // to 0.
          state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
        }
      }
    }

    state[0] = nextState;
    return matchingDecl;
  } // oneTransition(QName, int[], SubstitutionGroupHandler):  Object

  Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
    Object matchingDecl = null;

    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
      int type = fElemMapType[elemIndex];
      if (type == XSParticleDecl.PARTICLE_ELEMENT) {
        matchingDecl = subGroupHandler
            .getMatchingElemDecl(curElem, (XSElementDecl) fElemMap[elemIndex]);
        if (matchingDecl != null) {
          return matchingDecl;
        }
      } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
        if (((XSWildcardDecl) fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
          return fElemMap[elemIndex];
        }
      }
    }

    return null;
  } // findMatchingDecl(QName, SubstitutionGroupHandler): Object

  Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler,
      int elemIndex) {

    int curState = state[0];
    int nextState = 0;
    Object matchingDecl = null;

    while (++elemIndex < fElemMapSize) {
      nextState = fTransTable[curState][elemIndex];
      if (nextState == -1) {
        continue;
      }
      int type = fElemMapType[elemIndex];
      if (type == XSParticleDecl.PARTICLE_ELEMENT) {
        matchingDecl = subGroupHandler
            .getMatchingElemDecl(curElem, (XSElementDecl) fElemMap[elemIndex]);
        if (matchingDecl != null) {
          break;
        }
      } else if (type == XSParticleDecl.PARTICLE_WILDCARD) {
        if (((XSWildcardDecl) fElemMap[elemIndex]).allowNamespace(curElem.uri)) {
          matchingDecl = fElemMap[elemIndex];
          break;
        }
      }
    }

    // if we still can't find a match, set the state to FIRST_ERROR and return null
    if (elemIndex == fElemMapSize) {
      state[1] = state[0];
      state[0] = XSCMValidator.FIRST_ERROR;
      return findMatchingDecl(curElem, subGroupHandler);
    }

    // if we found a match, set the next state and reset the
    // counter if the next state is a counting state.
    state[0] = nextState;
    final Occurence o = fCountingStates[nextState];
    if (o != null) {
      state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
    }
    return matchingDecl;
  } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object

  // This method returns the start states of the content model.
  public int[] startContentModel() {
    // Clear all constant space algorithm counters in use
    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
      if (fElemMapCounter[elemIndex] != -1) {
        fElemMapCounter[elemIndex] = 0;
      }
    }
    // [0] : the current state
    // [1] : if [0] is an error state then the
    //       last valid state before the error
    // [2] : occurence counter for counting states
    return new int[3];
  } // startContentModel():int[]

  // this method returns whether the last state was a valid final state
  public boolean endContentModel(int[] state) {
    final int curState = state[0];
    if (fFinalStateFlags[curState]) {
      if (fCountingStates != null) {
        Occurence o = fCountingStates[curState];
        if (o != null && state[2] < o.minOccurs) {
          // not enough loops on the current state to be considered final.
          return false;
        }
      }
      return true;
    }
    return false;
  } // endContentModel(int[]):  boolean

  // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
  // but we can put it back later.

  //
  // Private methods
  //

  /**
   * Builds the internal DFA transition table from the given syntax tree.
   *
   * @param syntaxTree The syntax tree.
   * @throws RuntimeException Thrown if DFA cannot be built.
   */
  private void buildDFA(CMNode syntaxTree) {
    //
    //  The first step we need to take is to rewrite the content model
    //  using our CMNode objects, and in the process get rid of any
    //  repetition short cuts, converting them into '*' style repetitions
    //  or getting rid of repetitions altogether.
    //
    //  The conversions done are:
    //
    //  x+ -> (x|x*)
    //  x? -> (x|epsilon)
    //
    //  This is a relatively complex scenario. What is happening is that
    //  we create a top level binary node of which the special EOC value
    //  is set as the right side node. The the left side is set to the
    //  rewritten syntax tree. The source is the original content model
    //  info from the decl pool. The rewrite is done by buildSyntaxTree()
    //  which recurses the decl pool's content of the element and builds
    //  a new tree in the process.
    //
    //  Note that, during this operation, we set each non-epsilon leaf
    //  node's DFA state position and count the number of such leafs, which
    //  is left in the fLeafCount member.
    //
    //  The nodeTmp object is passed in just as a temp node to use during
    //  the recursion. Otherwise, we'd have to create a new node on every
    //  level of recursion, which would be piggy in Java (as is everything
    //  for that matter.)
    //

        /* MODIFIED (Jan, 2001)
         *
         * Use following rules.
         *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
         *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
         *
         * The same computation of follow as x* is applied to x+
         *
         * The modification drastically reduces computation time of
         * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
         */

    //
    //  And handle specially the EOC node, which also must be numbered
    //  and counted as a non-epsilon leaf node. It could not be handled
    //  in the above tree build because it was created before all that
    //  started. We save the EOC position since its used during the DFA
    //  building loop.
    //
    int EOCPos = fLeafCount;
    XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
    fHeadNode = new XSCMBinOp(
        XSModelGroupImpl.MODELGROUP_SEQUENCE,
        syntaxTree,
        nodeEOC
    );

    //
    //  Ok, so now we have to iterate the new tree and do a little more
    //  work now that we know the leaf count. One thing we need to do is
    //  to calculate the first and last position sets of each node. This
    //  is cached away in each of the nodes.
    //
    //  Along the way we also set the leaf count in each node as the
    //  maximum state count. They must know this in order to create their
    //  first/last pos sets.
    //
    //  We also need to build an array of references to the non-epsilon
    //  leaf nodes. Since we iterate it in the same way as before, this
    //  will put them in the array according to their position values.
    //
    fLeafList = new XSCMLeaf[fLeafCount];
    fLeafListType = new int[fLeafCount];
    postTreeBuildInit(fHeadNode);

    //
    //  And, moving onward... We now need to build the follow position
    //  sets for all the nodes. So we allocate an array of state sets,
    //  one for each leaf node (i.e. each DFA position.)
    //
    fFollowList = new CMStateSet[fLeafCount];
    for (int index = 0; index < fLeafCount; index++) {
      fFollowList[index] = new CMStateSet(fLeafCount);
    }
    calcFollowList(fHeadNode);
    //
    //  And finally the big push... Now we build the DFA using all the
    //  states and the tree we've built up. First we set up the various
    //  data structures we are going to use while we do this.
    //
    //  First of all we need an array of unique element names in our
    //  content model. For each transition table entry, we need a set of
    //  contiguous indices to represent the transitions for a particular
    //  input element. So we need to a zero based range of indexes that
    //  map to element types. This element map provides that mapping.
    //
    fElemMap = new Object[fLeafCount];
    fElemMapType = new int[fLeafCount];
    fElemMapId = new int[fLeafCount];

    fElemMapCounter = new int[fLeafCount];
    fElemMapCounterLowerBound = new int[fLeafCount];
    fElemMapCounterUpperBound = new int[fLeafCount];

    fElemMapSize = 0;
    Occurence[] elemOccurenceMap = null;

    for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
      // optimization from Henry Zongaro:
      //fElemMap[outIndex] = new Object ();
      fElemMap[outIndex] = null;

      int inIndex = 0;
      final int id = fLeafList[outIndex].getParticleId();
      for (; inIndex < fElemMapSize; inIndex++) {
        if (id == fElemMapId[inIndex]) {
          break;
        }
      }

      // If it was not in the list, then add it, if not the EOC node
      if (inIndex == fElemMapSize) {
        XSCMLeaf leaf = fLeafList[outIndex];
        fElemMap[fElemMapSize] = leaf.getLeaf();
        if (leaf instanceof XSCMRepeatingLeaf) {
          if (elemOccurenceMap == null) {
            elemOccurenceMap = new Occurence[fLeafCount];
          }
          elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
        }

        fElemMapType[fElemMapSize] = fLeafListType[outIndex];
        fElemMapId[fElemMapSize] = id;

        // Init counters and bounds for a{n,m} algorithm
        int[] bounds = (int[]) leaf.getUserData();
        if (bounds != null) {
          fElemMapCounter[fElemMapSize] = 0;
          fElemMapCounterLowerBound[fElemMapSize] = bounds[0];
          fElemMapCounterUpperBound[fElemMapSize] = bounds[1];
        } else {
          fElemMapCounter[fElemMapSize] = -1;
          fElemMapCounterLowerBound[fElemMapSize] = -1;
          fElemMapCounterUpperBound[fElemMapSize] = -1;
        }

        fElemMapSize++;
      }
    }

    // the last entry in the element map must be the EOC element.
    // remove it from the map.
    if (DEBUG) {
      if (fElemMapId[fElemMapSize - 1] != -1) {
        System.err.println("interal error in DFA: last element is not EOC.");
      }
    }
    fElemMapSize--;

    /***
     * Optimization(Jan, 2001); We sort fLeafList according to
     * elemIndex which is *uniquely* associated to each leaf.
     * We are *assuming* that each element appears in at least one leaf.
     **/

    int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
    int fSortCount = 0;

    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
      final int id = fElemMapId[elemIndex];
      for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
        if (id == fLeafList[leafIndex].getParticleId()) {
          fLeafSorter[fSortCount++] = leafIndex;
        }
      }
      fLeafSorter[fSortCount++] = -1;
    }

        /* Optimization(Jan, 2001) */

    //
    //  Next lets create some arrays, some that hold transient
    //  information during the DFA build and some that are permament.
    //  These are kind of sticky since we cannot know how big they will
    //  get, but we don't want to use any Java collections because of
    //  performance.
    //
    //  Basically they will probably be about fLeafCount*2 on average,
    //  but can be as large as 2^(fLeafCount*2), worst case. So we start
    //  with fLeafCount*4 as a middle ground. This will be very unlikely
    //  to ever have to expand, though it if does, the overhead will be
    //  somewhat ugly.
    //
    int curArraySize = fLeafCount * 4;
    CMStateSet[] statesToDo = new CMStateSet[curArraySize];
    fFinalStateFlags = new boolean[curArraySize];
    fTransTable = new int[curArraySize][];

    //
    //  Ok we start with the initial set as the first pos set of the
    //  head node (which is the seq node that holds the content model
    //  and the EOC node.)
    //
    CMStateSet setT = fHeadNode.firstPos();

    //
    //  Init our two state flags. Basically the unmarked state counter
    //  is always chasing the current state counter. When it catches up,
    //  that means we made a pass through that did not add any new states
    //  to the lists, at which time we are done. We could have used a
    //  expanding array of flags which we used to mark off states as we
    //  complete them, but this is easier though less readable maybe.
    //
    int unmarkedState = 0;
    int curState = 0;

    //
    //  Init the first transition table entry, and put the initial state
    //  into the states to do list, then bump the current state.
    //
    fTransTable[curState] = makeDefStateList();
    statesToDo[curState] = setT;
    curState++;

        /* Optimization(Jan, 2001); This is faster for
         * a large content model such as, "(t001+|t002+|.... |t500+)".
         */

    HashMap stateTable = new HashMap();

        /* Optimization(Jan, 2001) */

    //
    //  Ok, almost done with the algorithm... We now enter the
    //  loop where we go until the states done counter catches up with
    //  the states to do counter.
    //
    while (unmarkedState < curState) {
      //
      //  Get the first unmarked state out of the list of states to do.
      //  And get the associated transition table entry.
      //
      setT = statesToDo[unmarkedState];
      int[] transEntry = fTransTable[unmarkedState];

      // Mark this one final if it contains the EOC state
      fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);

      // Bump up the unmarked state count, marking this state done
      unmarkedState++;

      // Loop through each possible input symbol in the element map
      CMStateSet newSet = null;
            /* Optimization(Jan, 2001) */
      int sorterIndex = 0;
            /* Optimization(Jan, 2001) */
      for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
        //
        //  Build up a set of states which is the union of all of
        //  the follow sets of DFA positions that are in the current
        //  state. If we gave away the new set last time through then
        //  create a new one. Otherwise, zero out the existing one.
        //
        if (newSet == null) {
          newSet = new CMStateSet(fLeafCount);
        } else {
          newSet.zeroBits();
        }

                /* Optimization(Jan, 2001) */
        int leafIndex = fLeafSorter[sorterIndex++];

        while (leafIndex != -1) {
          // If this leaf index (DFA position) is in the current set...
          if (setT.getBit(leafIndex)) {
            //
            //  If this leaf is the current input symbol, then we
            //  want to add its follow list to the set of states to
            //  transition to from the current state.
            //
            newSet.union(fFollowList[leafIndex]);
          }

          leafIndex = fLeafSorter[sorterIndex++];
        }
                /* Optimization(Jan, 2001) */

        //
        //  If this new set is not empty, then see if its in the list
        //  of states to do. If not, then add it.
        //
        if (!newSet.isEmpty()) {
          //
          //  Search the 'states to do' list to see if this new
          //  state set is already in there.
          //

                    /* Optimization(Jan, 2001) */
          Integer stateObj = (Integer) stateTable.get(newSet);
          int stateIndex = (stateObj == null ? curState : stateObj.intValue());
                    /* Optimization(Jan, 2001) */

          // If we did not find it, then add it
          if (stateIndex == curState) {
            //
            //  Put this new state into the states to do and init
            //  a new entry at the same index in the transition
            //  table.
            //
            statesToDo[curState] = newSet;
            fTransTable[curState] = makeDefStateList();

                        /* Optimization(Jan, 2001) */
            stateTable.put(newSet, new Integer(curState));
                        /* Optimization(Jan, 2001) */

            // We now have a new state to do so bump the count
            curState++;

            //
            //  Null out the new set to indicate we adopted it.
            //  This will cause the creation of a new set on the
            //  next time around the loop.
            //
            newSet = null;
          }

          //
          //  Now set this state in the transition table's entry
          //  for this element (using its index), with the DFA
          //  state we will move to from the current state when we
          //  see this input element.
          //
          transEntry[elemIndex] = stateIndex;

          // Expand the arrays if we're full
          if (curState == curArraySize) {
            //
            //  Yikes, we overflowed the initial array size, so
            //  we've got to expand all of these arrays. So adjust
            //  up the size by 50% and allocate new arrays.
            //
            final int newSize = (int) (curArraySize * 1.5);
            CMStateSet[] newToDo = new CMStateSet[newSize];
            boolean[] newFinalFlags = new boolean[newSize];
            int[][] newTransTable = new int[newSize][];

            // Copy over all of the existing content
            System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
            System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
            System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);

            // Store the new array size
            curArraySize = newSize;
            statesToDo = newToDo;
            fFinalStateFlags = newFinalFlags;
            fTransTable = newTransTable;
          }
        }
      }
    }

    //
    // Fill in the occurence information for each looping state
    // if we're using counters.
    //
    if (elemOccurenceMap != null) {
      fCountingStates = new Occurence[curState];
      for (int i = 0; i < curState; ++i) {
        int[] transitions = fTransTable[i];
        for (int j = 0; j < transitions.length; ++j) {
          if (i == transitions[j]) {
            fCountingStates[i] = elemOccurenceMap[j];
            break;
          }
        }
      }
    }

    //
    //  And now we can say bye bye to the temp representation since we've
    //  built the DFA.
    //
    if (DEBUG_VALIDATE_CONTENT) {
      dumpTree(fHeadNode, 0);
    }
    fHeadNode = null;
    fLeafList = null;
    fFollowList = null;
    fLeafListType = null;
    fElemMapId = null;
  }

  /**
   * Calculates the follow list of the current node.
   *
   * @param nodeCur The curent node.
   * @throws RuntimeException Thrown if follow list cannot be calculated.
   */
  private void calcFollowList(CMNode nodeCur) {
    // Recurse as required
    if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
      // Recurse only
      calcFollowList(((XSCMBinOp) nodeCur).getLeft());
      calcFollowList(((XSCMBinOp) nodeCur).getRight());
    } else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
      // Recurse first
      calcFollowList(((XSCMBinOp) nodeCur).getLeft());
      calcFollowList(((XSCMBinOp) nodeCur).getRight());

      //
      //  Now handle our level. We use our left child's last pos
      //  set and our right child's first pos set, so go ahead and
      //  get them ahead of time.
      //
      final CMStateSet last = ((XSCMBinOp) nodeCur).getLeft().lastPos();
      final CMStateSet first = ((XSCMBinOp) nodeCur).getRight().firstPos();

      //
      //  Now, for every position which is in our left child's last set
      //  add all of the states in our right child's first set to the
      //  follow set for that position.
      //
      for (int index = 0; index < fLeafCount; index++) {
        if (last.getBit(index)) {
          fFollowList[index].union(first);
        }
      }
    } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
        || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
      // Recurse first
      calcFollowList(((XSCMUniOp) nodeCur).getChild());

      //
      //  Now handle our level. We use our own first and last position
      //  sets, so get them up front.
      //
      final CMStateSet first = nodeCur.firstPos();
      final CMStateSet last = nodeCur.lastPos();

      //
      //  For every position which is in our last position set, add all
      //  of our first position states to the follow set for that
      //  position.
      //
      for (int index = 0; index < fLeafCount; index++) {
        if (last.getBit(index)) {
          fFollowList[index].union(first);
        }
      }
    } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
      // Recurse only
      calcFollowList(((XSCMUniOp) nodeCur).getChild());
    }

  }

  /**
   * Dumps the tree of the current node to standard output.
   *
   * @param nodeCur The current node.
   * @param level The maximum levels to output.
   * @throws RuntimeException Thrown on error.
   */
  private void dumpTree(CMNode nodeCur, int level) {
    for (int index = 0; index < level; index++) {
      System.out.print("   ");
    }

    int type = nodeCur.type();

    switch (type) {

      case XSModelGroupImpl.MODELGROUP_CHOICE:
      case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
        if (type == XSModelGroupImpl.MODELGROUP_CHOICE) {
          System.out.print("Choice Node ");
        } else {
          System.out.print("Seq Node ");
        }

        if (nodeCur.isNullable()) {
          System.out.print("Nullable ");
        }

        System.out.print("firstPos=");
        System.out.print(nodeCur.firstPos().toString());
        System.out.print(" lastPos=");
        System.out.println(nodeCur.lastPos().toString());

        dumpTree(((XSCMBinOp) nodeCur).getLeft(), level + 1);
        dumpTree(((XSCMBinOp) nodeCur).getRight(), level + 1);
        break;
      }
      case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
      case XSParticleDecl.PARTICLE_ONE_OR_MORE:
      case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
        System.out.print("Rep Node ");

        if (nodeCur.isNullable()) {
          System.out.print("Nullable ");
        }

        System.out.print("firstPos=");
        System.out.print(nodeCur.firstPos().toString());
        System.out.print(" lastPos=");
        System.out.println(nodeCur.lastPos().toString());

        dumpTree(((XSCMUniOp) nodeCur).getChild(), level + 1);
        break;
      }
      case XSParticleDecl.PARTICLE_ELEMENT: {
        System.out.print
            (
                "Leaf: (pos="
                    + ((XSCMLeaf) nodeCur).getPosition()
                    + "), "
                    + "(elemIndex="
                    + ((XSCMLeaf) nodeCur).getLeaf()
                    + ") "
            );

        if (nodeCur.isNullable()) {
          System.out.print(" Nullable ");
        }

        System.out.print("firstPos=");
        System.out.print(nodeCur.firstPos().toString());
        System.out.print(" lastPos=");
        System.out.println(nodeCur.lastPos().toString());
        break;
      }
      case XSParticleDecl.PARTICLE_WILDCARD:
        System.out.print("Any Node: ");

        System.out.print("firstPos=");
        System.out.print(nodeCur.firstPos().toString());
        System.out.print(" lastPos=");
        System.out.println(nodeCur.lastPos().toString());
        break;
      default: {
        throw new RuntimeException("ImplementationMessages.VAL_NIICM");
      }
    }

  }


  /**
   * -1 is used to represent bad transitions in the transition table
   * entry for each state. So each entry is initialized to an all -1
   * array. This method creates a new entry and initializes it.
   */
  private int[] makeDefStateList() {
    int[] retArray = new int[fElemMapSize];
    for (int index = 0; index < fElemMapSize; index++) {
      retArray[index] = -1;
    }
    return retArray;
  }

  /**
   * Post tree build initialization.
   */
  private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
    // Set the maximum states on this node
    nodeCur.setMaxStates(fLeafCount);

    XSCMLeaf leaf = null;
    int pos = 0;
    // Recurse as required
    if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
      leaf = (XSCMLeaf) nodeCur;
      pos = leaf.getPosition();
      fLeafList[pos] = leaf;
      fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
    } else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
        (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
      postTreeBuildInit(((XSCMBinOp) nodeCur).getLeft());
      postTreeBuildInit(((XSCMBinOp) nodeCur).getRight());
    } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
        nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
        nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
      postTreeBuildInit(((XSCMUniOp) nodeCur).getChild());
    } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
      //  Put this node in the leaf list at the current index if its
      //  a non-epsilon leaf.
      leaf = (XSCMLeaf) nodeCur;
      pos = leaf.getPosition();
      fLeafList[pos] = leaf;
      fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
    } else {
      throw new RuntimeException("ImplementationMessages.VAL_NIICM");
    }
  }

  /**
   * check whether this content violates UPA constraint.
   *
   * @param subGroupHandler the substitution group handler
   * @return true if this content model contains other or list wildcard
   */
  public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler)
      throws XMLSchemaException {
    // Unique Particle Attribution
    // store the conflict results between any two elements in fElemMap
    // 0: not compared; -1: no conflict; 1: conflict
    // initialize the conflict table (all 0 initially)
    byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize];

    // for each state, check whether it has overlap transitions
    for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) {
      for (int j = 0; j < fElemMapSize; j++) {
        for (int k = j + 1; k < fElemMapSize; k++) {
          if (fTransTable[i][j] != -1 &&
              fTransTable[i][k] != -1) {
            if (conflictTable[j][k] == 0) {
              if (XSConstraints.overlapUPA
                  (fElemMap[j], fElemMap[k],
                      subGroupHandler)) {
                if (fCountingStates != null) {
                  Occurence o = fCountingStates[i];
                  // If "i" is a counting state and exactly one of the transitions
                  // loops back to "i" then the two particles do not overlap if
                  // minOccurs == maxOccurs.
                  if (o != null &&
                      fTransTable[i][j] == i ^ fTransTable[i][k] == i &&
                      o.minOccurs == o.maxOccurs) {
                    conflictTable[j][k] = (byte) -1;
                    continue;
                  }
                }
                conflictTable[j][k] = (byte) 1;
              } else {
                conflictTable[j][k] = (byte) -1;
              }
            }
          }
        }
      }
    }

    // report all errors
    for (int i = 0; i < fElemMapSize; i++) {
      for (int j = 0; j < fElemMapSize; j++) {
        if (conflictTable[i][j] == 1) {
          //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
          //                                             fElemMap[j].toString()});
          // REVISIT: do we want to report all errors? or just one?
          throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(),
              fElemMap[j].toString()});
        }
      }
    }

    // if there is a other or list wildcard, we need to check this CM
    // again, if this grammar is cached.
    for (int i = 0; i < fElemMapSize; i++) {
      if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) {
        XSWildcardDecl wildcard = (XSWildcardDecl) fElemMap[i];
        if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
            wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
          return true;
        }
      }
    }

    return false;
  }

  /**
   * Check which elements are valid to appear at this point. This method also
   * works if the state is in error, in which case it returns what should
   * have been seen.
   *
   * @param state the current state
   * @return a Vector whose entries are instances of either XSWildcardDecl or XSElementDecl.
   */
  public Vector whatCanGoHere(int[] state) {
    int curState = state[0];
    if (curState < 0) {
      curState = state[1];
    }
    Occurence o = (fCountingStates != null) ?
        fCountingStates[curState] : null;
    int count = state[2];

    Vector ret = new Vector();
    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
      int nextState = fTransTable[curState][elemIndex];
      if (nextState != -1) {
        if (o != null) {
          if (curState == nextState) {
            // Do not include transitions which loop back to the
            // current state if we've looped the maximum number
            // of times or greater.
            if (count >= o.maxOccurs &&
                o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
              continue;
            }
          }
          // Do not include transitions which advance past the
          // current state if we have not looped enough times.
          else if (count < o.minOccurs) {
            continue;
          }
        }
        ret.addElement(fElemMap[elemIndex]);
      }
    }
    return ret;
  }

  /**
   * Used by constant space algorithm for a{n,m} for n > 1 and
   * m <= unbounded. Called by a validator if validation of
   * countent model succeeds after subsuming a{n,m} to a*
   * (or a+) to check the n and m bounds.
   * Returns <code>null</code> if validation of bounds is
   * successful. Returns a list of strings with error info
   * if not. Even entries in list returned are error codes
   * (used to look up properties) and odd entries are parameters
   * to be passed when formatting error message. Each parameter
   * is associated with the error code that preceeds it in
   * the list.
   */
  public ArrayList checkMinMaxBounds() {
    ArrayList result = null;
    for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
      int count = fElemMapCounter[elemIndex];
      if (count == -1) {
        continue;
      }
      final int minOccurs = fElemMapCounterLowerBound[elemIndex];
      final int maxOccurs = fElemMapCounterUpperBound[elemIndex];
      if (count < minOccurs) {
        if (result == null) {
          result = new ArrayList();
        }
        result.add("cvc-complex-type.2.4.b");
        result.add("{" + fElemMap[elemIndex] + "}");
      }
      if (maxOccurs != -1 && count > maxOccurs) {
        if (result == null) {
          result = new ArrayList();
        }
        result.add("cvc-complex-type.2.4.e");
        result.add("{" + fElemMap[elemIndex] + "}");
      }
    }
    return result;
  }

} // class DFAContentModel
